library(readxl)#Importing Excel spreadsheets
library(tidyverse)#General Cleaning


# Cleaning inquisit belief updating task
## Catherine Hobbs 18.05.21

#Set working directory below to location of saved raw files
#setwd()

#Importing Files
Phase1_2_3_3b_DC_Task <- read_excel("Phase1_2_3_3b_DC_Task.xlsx")
Phase1_2_3_3b_DC_Qs_1 <- read_excel("Phase1_2_3_3b_DC_Qs_1.xlsx")
Phase1_2_3_3b_DC_Qs_2 <- read_excel("Phase1_2_3_3b_DC_Qs_2.xlsx")

prolific_IDs <- read_excel("prolific_IDs.xlsx")

#Renaming participant ID variables to be consistent
##Qs 1
Phase1_2_3_3b_DC_Qs_1_cleaned <- Phase1_2_3_3b_DC_Qs_1 %>%
  rename(subject = PROLIFIC_PID, StartDate_Qs_1 = StartDate, EndDate_Qs_1 = EndDate, Status_Qs_1 = Status, Progress_Qs_1 = Progress, Duration_Qs_1 = `Duration (in seconds)`, Finished_Qs_1 = Finished, RecordedDate_Qs_1 = RecordedDate, ResponseId_Qs_1 = ResponseId, DistributionChannel_Qs_1 = DistributionChannel, UserLanguage_Qs_1 = UserLanguage, Q_RecaptchaScore_Qs_1 = Q_RecaptchaScore, correct_ID_Qs_1 = correct_ID, ID_correction_Qs_1 = ID_correction) %>%
  rename(attention_check_Qs = SC1) %>%
  select(- SC0)

names(Phase1_2_3_3b_DC_Qs_1_cleaned) <- gsub(x = names(Phase1_2_3_3b_DC_Qs_1_cleaned), pattern = "Q141", replacement = "consent")

##One participant has a blank entry with no consent (they have another row with completed data 5fb57ec2517a3e0b16afae3a, removing this)
Phase1_2_3_3b_DC_Qs_1_cleaned <- Phase1_2_3_3b_DC_Qs_1_cleaned %>%
  filter(!is.na(consent_1))

##Qs2
Phase1_2_3_3b_DC_Qs_2_cleaned <- Phase1_2_3_3b_DC_Qs_2 %>%
  rename(subject = PROLIFIC_PID, StartDate_Qs_2 = StartDate, EndDate_Qs_2 = EndDate, Status_Qs_2 = Status, Progress_Qs_2 = Progress, Duration_Qs_2 = `Duration (in seconds)`, Finished_Qs_2 = Finished, RecordedDate_Qs_2 = RecordedDate, ResponseId_Qs_2 = ResponseId, DistributionChannel_Qs_2 = DistributionChannel, UserLanguage_Qs_2 = UserLanguage, Q_RecaptchaScore_Qs_2 = Q_RecaptchaScore, correct_ID_Qs_2 = correct_ID, ID_correction_Qs_2 = ID_correction) %>%
  select(- SC0)

#Removing pilot data, 4dd7ce22bc427f6dfd7fb88f5a036fd4835f35e1
Phase1_2_3_3b_DC_Qs_1_cleaned <- Phase1_2_3_3b_DC_Qs_1_cleaned %>%
  filter(subject != "4dd7ce22bc427f6dfd7fb88f5a036fd4835f35e1")

Phase1_2_3_3b_DC_Qs_2_cleaned <- Phase1_2_3_3b_DC_Qs_2_cleaned %>%
  filter(subject != "4dd7ce22bc427f6dfd7fb88f5a036fd4835f35e1")

Phase1_2_3_3b_DC_Task_cleaned <- Phase1_2_3_3b_DC_Task %>%
  filter(subject != "4dd7ce22bc427f6dfd7fb88f5a036fd4835f35e1")

#Three participants have two Q1 entries - one participant has an entry which is blank, the other has two due to errors starting the task - taking their newer version as this will match up better with the task
Phase1_2_3_3b_DC_Qs_1_cleaned <- Phase1_2_3_3b_DC_Qs_1_cleaned %>%
  mutate(remove = ifelse(subject == "b6c77a2762d3ce8d77da5f74b41b83572ed2e337" & Progress_Qs_1 == 2, 1, 2)) %>%
  filter(remove == 2) %>%
  select(- remove)

Phase1_2_3_3b_DC_Qs_1_cleaned <- Phase1_2_3_3b_DC_Qs_1_cleaned %>%
  group_by(subject) %>%
  mutate(n_rows = n()) %>%
  mutate(remove = ifelse(n_rows == 1, 1, 2)) %>%
  mutate(remove2 = ifelse(remove == 2 & (min(StartDate_Qs_1) == StartDate_Qs_1), 2, 1)) %>%
  filter(remove2 == 1) %>%
  select(- remove, - remove2) %>%
  ungroup()

#Pulling out IDs from each part of data collection
Phase1_2_3_3b_DC_Qs_1_IDs <- Phase1_2_3_3b_DC_Qs_1_cleaned %>%
  select(subject) %>%
  mutate(Qs_1_data_available = "TRUE")

Phase1_2_3_3b_DC_Qs_2_IDs <- Phase1_2_3_3b_DC_Qs_2_cleaned %>%
  select(subject) %>%
  mutate(Qs_2_data_available = "TRUE")

Phase1_2_3_3b_DC_Task_IDs <- Phase1_2_3_3b_DC_Task %>%
  select(subject) %>%
  mutate(Task_data_available = "TRUE") %>%
  group_by(subject) %>%
  slice(1) %>%
  ungroup()

#Merging IDs to get a list of who completed what
IDs <- merge(Phase1_2_3_3b_DC_Qs_1_IDs, Phase1_2_3_3b_DC_Task_IDs, by = "subject", all = TRUE)
IDs <- merge(IDs, Phase1_2_3_3b_DC_Qs_2_IDs, by = "subject", all.x = TRUE)

#Merging with list of Prolific IDs
IDs <- merge(IDs, prolific_IDs, by = "subject", all = TRUE)

#Keeping only people recruited on Prolific (other will have been piloting)
Prolific_IDs_data <- IDs %>%
  filter(!is.na(prolific_recruited))

#Merging
Phase_1_DC_Task_Qs_cleaned <- merge(Prolific_IDs_data, Phase1_2_3_3b_DC_Qs_1_cleaned, by = "subject", all.x = TRUE)
Phase_1_DC_Task_Qs_cleaned <- merge(Phase_1_DC_Task_Qs_cleaned, Phase1_2_3_3b_DC_Task, by = "subject", all = TRUE)
Phase_1_DC_Task_Qs_cleaned <- merge(Phase_1_DC_Task_Qs_cleaned, Phase1_2_3_3b_DC_Qs_2_cleaned, by = "subject", all.x = TRUE) #Lost some people at the last bit of Qs, am keeping as will be useful for the main analysis


# Task Attention Checks --------------------------------------------------------
attention_checks_task <- Phase_1_DC_Task_Qs_cleaned %>%
  filter(grepl("attention", .$trialcode))

individual_attention_trials <- attention_checks_task %>%
  select(subject, trialcode, trialnum, response, correct, propCorrect_AttentionChecks, numberCorrect_AttentionChecks) %>%
  arrange(subject, trialnum) %>%
  mutate(passed = ifelse(trialcode == "attentionCheck1" & response == "disagree", 1,
                         ifelse(trialcode == "attentionCheck2" & response == "10", 1,
                                ifelse(trialcode == "attentionCheck3" & response == "agree", 1,
                                       ifelse(trialcode == "attentionCheck4" & response == "25", 1,
                                              ifelse(trialcode == "attentionCheck5" & response == "no", 1,
                                                     ifelse(trialcode == "attentionCheck6" & response == "yes", 1, 0))))))) %>%
  group_by(subject) %>%
  mutate(attention_trials_n = n()) %>%
  ungroup()

attention_check_task_overall <- individual_attention_trials %>%
  group_by(subject) %>%
  summarise(total_attention_passed = sum(passed, na.rm = TRUE), attention_trials_n = mean(attention_trials_n)) %>%
  mutate(percent_attention_passed = total_attention_passed/attention_trials_n *100) %>%
  mutate(attention_checks_passed = ifelse(percent_attention_passed >= 100, TRUE, FALSE)) %>%
  ungroup()

attention_check_questionnaires <- Phase_1_DC_Task_Qs_cleaned %>%
  select(subject, attention_check_Qs) %>%
  group_by(subject) %>%
  slice(1) %>%
  ungroup()

attention_check_combined <- merge(attention_check_task_overall, attention_check_questionnaires, by = "subject")

attention_check_combined <- attention_check_combined %>%
  mutate(total_attention_passed = total_attention_passed + attention_check_Qs) %>%
  mutate(attention_checks_passed = ifelse(total_attention_passed == 8, TRUE, FALSE)) %>%
  select(subject, total_attention_passed, attention_checks_passed)

# Checking whether participants completed the task ------------------------
number_trials <- Phase_1_DC_Task_Qs_cleaned %>%
  group_by(subject) %>%
  summarise(trial_n = n()) %>%
  ungroup()

task_not_completed <- number_trials %>%
  filter(trial_n < 1290)


# IDs that can be used ----------------------------------------------------
complete_data_IDs <- Prolific_IDs_data %>%
  filter(Qs_1_data_available == TRUE & Task_data_available == TRUE)#Again lost some people at 2nd set of Qs but keeping in as main questionnaires & task available

complete_data_IDs <- merge(complete_data_IDs, attention_check_combined, by = "subject", all.x = TRUE)
complete_data_IDs <- merge(complete_data_IDs, number_trials, by = "subject", all.x = TRUE)

complete_data_IDs <- complete_data_IDs %>%
  filter(total_attention_passed >= 7 & trial_n > 1290)

#Taking out key info to merge with task & questionnaire data for cleaning
complete_data_IDs <- complete_data_IDs %>%
  select(subject, Phase, Group, total_attention_passed)


# Cleaning Task Data ------------------------------------------------------
Task_eligible_ps <- merge(complete_data_IDs, Phase1_2_3_3b_DC_Task_cleaned, by = "subject", all.x = TRUE) 

# Estimates ---------------------------------------------------------------

#Labelling blocks
Task_eligible_ps <- Task_eligible_ps %>%
  mutate(block_type = ifelse(grepl("estimate1", blockcode), "Initial Estimate",
                             ifelse(grepl("estimate2", blockcode), "Second Estimate",
                                    ifelse(grepl("recall", blockcode), "Recall",
                                           ifelse(grepl("ratings", blockcode), "Ratings", NA)))))

trial_estimates_initial <- Task_eligible_ps %>%
  filter(trialcode == "lifeEventProbability_estimate1") %>%
  select(subject, group, lifeEvent, valence, list, happenCondition, p_H, p_NH, p_estimate1, RT_estimate1)

##Should be 0
multiple_initial_estimates <- trial_estimates_initial %>%
  group_by(subject, lifeEvent) %>%
  mutate(row_n = n()) %>%
  filter(row_n > 1)


trial_estimates_updated <- Task_eligible_ps %>%
  filter(block_type == "Second Estimate" & trialcode == "lifeEventProbability_estimate2") %>%
  select(subject, lifeEvent, p_estimate2, RT_estimate2)

##Should be 0
multiple_updated_estimates <- trial_estimates_updated %>%
  group_by(subject, lifeEvent) %>%
  mutate(row_n = n()) %>%
  filter(row_n > 1)

trial_estimates <- merge(trial_estimates_initial, trial_estimates_updated, by = c("subject", "lifeEvent"))


trial_estimates <- trial_estimates %>%
  mutate(missing_estimate = ifelse(is.na(p_estimate1) | is.na(p_estimate2), 1, 0)) %>% # Identifying life events with missing estimates
  group_by(subject) %>%
  mutate(overall_trial_n = n(), overall_n_missing_estimate = sum(missing_estimate)) %>% #Below code is checking number trials overall, by valence and by list (should be 80, 40, 20 respectively), and also counts the number of missing estimates within this
  group_by(subject, valence) %>%
  mutate(valence_trial_n = n(), valence_n_missing_estimate = sum(missing_estimate)) %>%
  group_by(subject, valence, list) %>%
  mutate(valence_list_trial_n = n(), valence_list_missing_estimate = sum(missing_estimate)) %>%
  group_by(subject, lifeEvent) %>%
  mutate(lifeEvent_n = n()) %>% #Checking single life event per participant (should be 1)
  ungroup() %>%
  mutate(p_estimate1 = ifelse(happenCondition == 2, 100 - p_estimate1, p_estimate1)) %>% #Subtracting not happening events from 100 to be in the same direction as happening
  mutate(p_estimate2 = ifelse(happenCondition == 2, 100 - p_estimate2, p_estimate2)) %>% #Subtracting not happening events from 100 to be in the same direction as happening
  mutate(estimation_error = p_estimate1 - p_H) %>% # Estimation errors will be calculated by subtracting the actual probability of events from the participants' initial estimate of the events 
  mutate(desirability = ifelse(is.na(estimation_error), "Missing Estimate",
                               ifelse(valence == "negative" & estimation_error > 0, "desirable",
                               ifelse(valence == "negative" & estimation_error < 0, "undesirable",
                                      ifelse(valence == "positive" & estimation_error > 0, "undesirable",
                                             ifelse(valence == "positive" & estimation_error < 0, "desirable",
                                                    ifelse(estimation_error == 0, "Initial Estimate = Probability Happening", NA))))))) %>% # Participant trials for each life event will be categorised according to whether the actual probability presented is desirable or undesirable.
  mutate(update_inital_calculation = p_estimate1 - p_estimate2) %>% # To calculate update scores, re-estimates (i.e. estimates made after presentation of the actual probability) were subtracted from initial estimates (i.e. update = initial estimate - re-estimate). 
  mutate(update_direction = ifelse(p_H > p_estimate1 & p_estimate2 > p_estimate1, "towards",
                                   ifelse(p_H > p_estimate1 & p_estimate2 < p_estimate1, "away",
                                          ifelse(p_H < p_estimate1 & p_estimate2 > p_estimate1, "away",
                                                 ifelse(p_H < p_estimate1 & p_estimate2 < p_estimate1, "towards", NA))))) %>% # These values will then be coded as positive when the update (the re-estimate) moves towards the actual probability, and negative when the update moves away from the actual probability. Positive scores therefore indicate movement towards the actual probability, whereas negative scores indicate movement away from the actual probability (irrespective of valence and desirability). 
  mutate(update = ifelse(update_direction == "towards" & update_inital_calculation < 0, update_inital_calculation * -1,
                         ifelse(update_direction == "away" & update_inital_calculation > 0, update_inital_calculation * -1, update_inital_calculation))) %>% # Values in the towards direction should be positive, those in the away direction should be negative

  group_by(subject, valence, desirability) %>%
  mutate(valence_desirability_n = n()) %>% #Counting number of trials classified as desirable/undesirable according to valence
  ungroup()



# Recall ------------------------------------------------------------------
trial_recall <- Task_eligible_ps %>%
  filter(trialcode == "recall") %>%
  select(subject, lifeEvent, happenCondition, p_H, p_recall, RT_recall) %>%
  mutate(p_recall = ifelse(happenCondition == 2, 100 - p_recall, p_recall)) %>% #Subtracting from 100 for not happening events to make same direction as happening
  mutate(memory_errors = p_H - p_recall) %>% # Memory errors will be calculated by subtracting the participants' recalled average probability from the actual probability of the event 
  select(- happenCondition, - p_H)%>%
  group_by(subject, lifeEvent) %>%
  mutate(lifeEvent_n = n()) %>% #Checking 1 life event per participant
  ungroup()

summary(trial_recall$lifeEvent_n)

trial_recall <- trial_recall %>%
  select(- lifeEvent_n)


# Ratings -----------------------------------------------------------------
trial_ratings <- Task_eligible_ps %>%
  filter(trialcode == "rating") %>%
  select(subject, lifeEvent, rating_controllability, rating_emotionalarousal, rating_familiarity, rating_negativity, rating_positivity, rating_priorexperience, rating_vividness, RT_controllability, RT_emotionalarousal, RT_familiarity, RT_negativity, RT_positivity, RT_priorexperience, RT_vividness) %>%
  group_by(subject, lifeEvent) %>%
  mutate(trial_n_lifeEvent = 1:n()) %>%
  filter(trial_n_lifeEvent == 7) %>% #Taking last trial as this is where all the ratings are updated so are correct for that life event, previously brought forward from previous life event
  select(- trial_n_lifeEvent) %>%
  mutate(lifeEvent_n = n()) %>% #Checking 1 life event per participant
  ungroup() 

summary(trial_ratings$lifeEvent_n)

trial_ratings <- trial_ratings %>%
  select(- lifeEvent_n)


# Merging & collapsing ----------------------------------------------------
event_level_data <- merge(trial_estimates, trial_recall, by = c("subject", "lifeEvent"))
event_level_data <- merge(event_level_data, trial_ratings, by = c("subject", "lifeEvent"))

#Identifying whether eligible trial (both estimates provided & estimation error != 0)
event_level_data <- event_level_data %>%
  mutate(eligible_trial = ifelse(desirability == "Initial Estimate = Probability Happening", "Estimation Error of Zero",
                                 ifelse(missing_estimate == 1, "Missing Participant Estimate",
                                        ifelse(p_estimate1 < 3 | p_estimate1 > 77 | p_estimate2 < 3 | p_estimate2 > 77, "Outside Range",  "Eligible"))))

#Selecting relevant columns
event_level_data <- event_level_data %>%
  select(subject, lifeEvent, group, valence, list, happenCondition, eligible_trial, p_H, p_NH, p_estimate1, RT_estimate1, p_estimate2, RT_estimate2, estimation_error, desirability, update_inital_calculation, update_direction, update, p_recall, RT_recall, memory_errors, rating_controllability, rating_emotionalarousal, rating_familiarity, rating_negativity, rating_positivity, rating_priorexperience, rating_vividness, RT_controllability, RT_emotionalarousal, RT_familiarity, RT_negativity, RT_positivity, RT_priorexperience, RT_vividness) %>%
  rename(task_group = group) %>%
  rename(desirability_trial_type = desirability) %>%
  mutate(desirability = ifelse(desirability_trial_type == "desirable", 1,
                               ifelse(desirability_trial_type == "undesirable", 2, NA))) %>% #Creating basic desirability variable for descriptives 
  select(- desirability_trial_type) %>%
  select(subject, lifeEvent, task_group, valence, list, happenCondition, eligible_trial, desirability, everything())

event_level_data$desirability <- factor(event_level_data$desirability,
                                        levels = c(1, 2),
                                        labels = c("Desirable", "Undesirable"))

event_level_data$valence <- factor(event_level_data$valence)
event_level_data$task_group <- factor(event_level_data$task_group)
event_level_data$list <- factor(event_level_data$list)
event_level_data$happenCondition <- factor(event_level_data$happenCondition,
                                         levels = c(1, 2),
                                         labels = c("Happening", "Not Happening"))
event_level_data$eligible_trial <- factor(event_level_data$eligible_trial)
event_level_data$update_direction <- factor(event_level_data$update_direction)



###########Questionnaires##############

Q1_clean <- merge(complete_data_IDs, Phase1_2_3_3b_DC_Qs_1_cleaned, by = "subject", all.x = TRUE)
Q2_clean <- merge(complete_data_IDs, Phase1_2_3_3b_DC_Qs_2_cleaned, by = "subject", all.x = TRUE)


# Pre Task ----------------------------------------------------------------

#Consent

Q1_clean <- Q1_clean %>%
  mutate(consented = ifelse(consent_1 == 1 & consent_2 == 1 & consent_3 == 1 & consent_4 == 1 & consent_5 == 1 & consent_6 == 1, 1, 0))

Q1_clean$consented <- factor(Q1_clean$consented,
                             levels = c(1, 0),
                             labels = c("Yes", "No"))

#Age
Q1_clean <- Q1_clean %>%
  rename(age = age_1)

#Gender
Q1_clean$gender <- factor(Q1_clean$gender,
                          levels = c(1, 2, 3),
                          labels = c("Male", "Female", "Other"))

Q1_clean <- Q1_clean %>%
  rename(gender_other_specific = gender_3_TEXT)

#Ethnicity
Q1_clean$ethnicity <- factor(Q1_clean$ethnicity,
                             levels = c(1, 2, 3, 4, 5),
                             labels = c("White", "Black", "Asian", "Mixed", "Other"))


#Employment Status

Q1_clean$employment <- factor(Q1_clean$employment,
                              levels = c(1, 2, 3, 4, 6, 7, 8),
                              labels = c("Unemployed", "Employed Full-Time", "Employed Part-Time", "Student Full-Time", "Student Part-Time", "Employed and Student", "Other"))
#Note: Error in coding employment on qualtrics (we're missing 5  - this code above will convert properly though)

## Creating aggregate variable
Q1_clean <- Q1_clean %>%
  mutate(employment_aggregated = ifelse(employment == "Employed Full-Time" | employment == "Employed Part-Time", 1,
                                        ifelse(employment == "Student Full-Time" | employment == "Student Part-Time", 2,
                                               ifelse(employment == "Unemployed", 3, 4)))) #Putting part-time ed and work in other category, and merging full/part time into one


Q1_clean$employment_aggregated <- factor(Q1_clean$employment_aggregated,
                              levels = c(1, 2, 3, 4),
                              labels = c("Employed", "Student", "Unemployed", "Other"))


#Education
Q1_clean <- Q1_clean %>%
  mutate(education_aggregated = ifelse(education == 1, 1,
                                       ifelse(education == 2 | education == 3, 2,
                                              ifelse(education == 4, 3,
                                                     ifelse(education == 5 | education == 6, 4, 5)))))


Q1_clean$education <- factor(Q1_clean$education,
                             levels = c(1, 2, 3, 4, 5, 6, 7),
                             labels = c("Primary School", "GCSEs or equivalent", "A-Levels or equivalent", "Undergraduate degree", "Postgraduate degree", "Doctoral degree", "Other"))

Q1_clean$education_aggregated <- factor(Q1_clean$education_aggregated,
                             levels = c(1, 2, 3, 4, 5),
                             labels = c("Primary Education", "Secondary Education", "Degree", "Higher Degree", "Other"))

table(Q1_clean$education, Q1_clean$education_aggregated)


#Relationship status (note this is called Q136 in pilot have converted for actual data collection)
Q1_clean <- Q1_clean %>%
  rename(relationship_other_specific = relationship_3_TEXT)

##Some p.s put married as other, correcting this to reflect in a relationship
Q1_clean <- Q1_clean %>%
  mutate(relationship = ifelse(grepl("Married", relationship_other_specific), 2, relationship))


Q1_clean$relationship <- factor(Q1_clean$relationship,
                                levels = c(1, 2, 3),
                                labels = c("Single", "In a relationship", "Other"))

#Living situation
Q1_clean$living <- factor(Q1_clean$living,
                          levels = c(1, 2, 3, 4),
                          labels = c("Home Owner", "Renting", "Living with a relative/friend", "Other"))

#Experience of depression

Q1_clean <- Q1_clean %>%
  rename(depression = depression_experienc)

Q1_clean$depression <- factor(Q1_clean$depression,
                              levels = c(1, 2, 3),
                              labels = c("Yes, currently", "In the past, but not currently", "No"))

#Treatment for depression
Q1_clean <- Q1_clean %>%
  mutate(psych_ther_ever = ifelse(grepl("1", .$depression_treatment), 1, NA)) %>%
  mutate(antid_ever = ifelse(grepl("2", .$depression_treatment), 1, NA)) %>%
  rename(other_ther_ever_specific = depression_treatment_4_TEXT)

Q1_clean$psych_ther_ever <- factor(Q1_clean$psych_ther_ever,
                                   levels = c(1),
                                   labels = c("Yes"))

Q1_clean$antid_ever <- factor(Q1_clean$antid_ever,
                              levels = c(1),
                              labels = c("Yes"))

Q1_clean <- Q1_clean %>%
  rename(antid_curr = depression_antid, psych_ther_curr = depression_psych) %>%
  mutate(antid_curr = ifelse(antid_curr == 1, 1, NA)) %>%
  mutate(psych_ther_curr = ifelse(psych_ther_curr == 1, 1, NA))


Q1_clean$psych_ther_curr <- factor(Q1_clean$psych_ther_curr,
                                   levels = c(1),
                                   labels = c("Yes"))

Q1_clean$antid_curr <- factor(Q1_clean$antid_curr,
                              levels = c(1),
                              labels = c("Yes"))


#Family history of depression
Q1_clean <- Q1_clean %>%
  rename(depress_family_hist = depression_family)

Q1_clean$depress_family_hist <- factor(Q1_clean$depress_family_hist,
                                       levels = c(1, 2),
                                       labels = c("Yes", "No"))

#Questionnaires

##Subtracting 1 from questionnaires scored 0 + (as qualtrics automatically scores from 1 +)
minus_1 <- function(x){
  x-1
}

Q1_clean <- Q1_clean %>%
  mutate_at(vars(contains('PHQ')), minus_1) %>%
  mutate_at(vars(contains('BDI')), minus_1) %>%
  mutate_at(vars(contains('GAD')), minus_1) %>%
  mutate_at(vars(contains('LOT')), minus_1) 

##Reverse Coding BFNE
Q1_clean$BFNE2_recode <- dplyr::recode(Q1_clean$BFNE2, `1` = 5L, `2` = 4L, `3` = 3L, `4` = 2L, `5` = 1L)
Q1_clean$BFNE4_recode <- dplyr::recode(Q1_clean$BFNE4, `1` = 5L, `2` = 4L, `3` = 3L, `4` = 2L, `5` = 1L)
Q1_clean$BFNE7_recode <- dplyr::recode(Q1_clean$BFNE7, `1` = 5L, `2` = 4L, `3` = 3L, `4` = 2L, `5` = 1L)
Q1_clean$BFNE10_recode <- dplyr::recode(Q1_clean$BFNE10, `1` = 5L, `2` = 4L, `3` = 3L, `4` = 2L, `5` = 1L)

##Reverse Coding LOT-R (note: you're meant to recode 3, 7 and 9 but I coded it in qualtrics so that low values = high optimism but it should be the other way around, recoding the other values instead (2, 5, 6, 8 are fillers so didn't recode))
Q1_clean$LOT1_recode <- dplyr::recode(Q1_clean$LOT1, `0` = 4L, `1` = 3L, `2` = 2L, `3` = 1L, `4` = 0L)
Q1_clean$LOT4_recode <- dplyr::recode(Q1_clean$LOT4, `0` = 4L, `1` = 3L, `2` = 2L, `3` = 1L, `4` = 0L)
Q1_clean$LOT10_recode <- dplyr::recode(Q1_clean$LOT10, `0` = 4L, `1` = 3L, `2` = 2L, `3` = 1L, `4` = 0L)

##Summing qs
Q1_clean <- Q1_clean %>%
  mutate(BDI_tot = BDI1 + BDI2 + BDI3 + BDI4 + BDI5 + BDI6 + BDI7 + BDI8 + BDI9 + BDI10 + BDI11 + BDI12 + BDI13 + BDI14 + BDI15 + BDI16 + BDI17 + BDI18 + BDI19 + BDI20 + BDI21) %>%
  mutate(PHQ_tot = PHQ1 + PHQ2 + PHQ3 + PHQ4 + PHQ5 + PHQ6 + PHQ7 + PHQ8 + PHQ9) %>%
  mutate(GAD_tot = GAD1 + GAD2 + GAD3 + GAD4 + GAD5 + GAD6 + GAD7) %>%
  mutate(BFNE_tot = BFNE1 + BFNE2_recode + BFNE3 + BFNE4_recode + BFNE5 + BFNE6 + BFNE7_recode + BFNE8 + BFNE9 + BFNE10_recode + BFNE11 + BFNE12) %>%
  mutate(LOT_tot = LOT1_recode + LOT3 + LOT4_recode + LOT7 + LOT9 + LOT10_recode) %>% #note items 2 5 6 8 of the LOT are fillers
  mutate(PANAS_pos_1 = PANAS1.1 + PANAS1.3 + PANAS1.5 + PANAS1.9 + PANAS1.10 + PANAS1.12 + PANAS1.14 + PANAS1.16 + PANAS1.17 + PANAS1.19) %>%
  mutate(PANAS_neg_1 = PANAS1.2 + PANAS1.4 + PANAS1.6 + PANAS1.7 + PANAS1.8 +  + PANAS1.11 + PANAS1.13 + PANAS1.15 + PANAS1.18 + PANAS1.20)



#Selecting & ordering variables
Q1_clean <- Q1_clean %>%
  select(subject, consented, StartDate_Qs_1, EndDate_Qs_1, age, gender, gender_other_specific, ethnicity, employment, employment_aggregated, education, education_aggregated, relationship, relationship_other_specific, living, depression, psych_ther_ever, antid_ever, psych_ther_curr, antid_curr, depress_family_hist, BDI1, BDI2, BDI3, BDI4, BDI5, BDI6, BDI7, BDI8, BDI9, BDI10, BDI11, BDI12, BDI13, BDI14, BDI15, BDI16, BDI17, BDI18, BDI19, BDI20, BDI21, BDI_tot, PHQ1, PHQ2, PHQ3, PHQ4, PHQ5, PHQ6, PHQ7, PHQ8, PHQ9, PHQ_tot, GAD1, GAD2, GAD3, GAD4, GAD5, GAD6, GAD7, GAD_tot, BFNE1, BFNE2, BFNE2_recode, BFNE3, BFNE4, BFNE4_recode, BFNE5, BFNE6, BFNE7, BFNE7_recode, BFNE8, BFNE9, BFNE10, BFNE10_recode, BFNE11, BFNE12, BFNE_tot, LOT1, LOT1_recode, LOT2, LOT3, LOT4, LOT4_recode, LOT5, LOT6, LOT7, LOT8, LOT9, LOT10, LOT10_recode, LOT_tot, PANAS1.1, PANAS1.2, PANAS1.3, PANAS1.4, PANAS1.5, PANAS1.6, PANAS1.7, PANAS1.8, PANAS1.9, PANAS1.10, PANAS1.11, PANAS1.12, PANAS1.13, PANAS1.14, PANAS1.15, PANAS1.16, PANAS1.17, PANAS1.18, PANAS1.19, PANAS1.20, PANAS_pos_1, PANAS_neg_1)



# Post Task Qs ------------------------------------------------------------


# Debriefing
Q2_clean$debrief_factor <- factor(Q2_clean$debrief,
                                  levels = c(1, 2, 3, 4, 5),
                                  labels = c("Strongly Agree", "Somewhat Agree", "Neither agree nor disagree", "Somewhat disagree", "Strongly disagree"))

#PANAS
Q2_clean <- Q2_clean %>%
  mutate(PANAS_pos_2 = PANAS2.1 + PANAS2.3 + PANAS2.5 + PANAS2.9 + PANAS2.10 + PANAS2.12 + PANAS2.14 + PANAS2.16 + PANAS2.17 + PANAS2.19) %>%
  mutate(PANAS_neg_2 = PANAS2.2 + PANAS2.4 + PANAS2.6 + PANAS2.7 + PANAS2.8 +  + PANAS2.11 + PANAS2.13 + PANAS2.15 + PANAS2.18 + PANAS2.20)

#Selecting variables
Q2_clean <- Q2_clean %>%
  select(subject, StartDate_Qs_2, EndDate_Qs_2, PANAS2.1, PANAS2.2, PANAS2.3, PANAS2.4, PANAS2.5, PANAS2.6, PANAS2.7, PANAS2.8, PANAS2.9, PANAS2.10, PANAS2.11, PANAS2.12, PANAS2.13, PANAS2.14, PANAS2.15, PANAS2.16, PANAS2.17, PANAS2.18, PANAS2.19, PANAS2.20, PANAS_pos_2, PANAS_neg_2, debrief, debrief_factor)



# Merging together questionnaires -----------------------------------------
Qs_clean <- merge(Q1_clean, Q2_clean, by = "subject")

#Creating PANAS change scores
Qs_clean <- Qs_clean %>%
  mutate(PANAS_pos_change = PANAS_pos_2 - PANAS_pos_1) %>%
  mutate(PANAS_neg_change = PANAS_neg_2 - PANAS_neg_1)


# Merging back together with attention checks
Qs_clean <- merge(complete_data_IDs, Qs_clean, by = "subject", all.x = TRUE)


# Adding in screening PHQ scores ------------------------------------------

Healthy_screening <- read_excel("Healthy_screening.xlsx")
Depressed_screening <- read_excel("Depressed_screening.xlsx")

recruitment_data <- bind_rows(Healthy_screening, Depressed_screening)

recruitment_data <- recruitment_data %>%
  mutate_at(vars(contains('PHQ')), minus_1)

recruitment_data <- recruitment_data %>%
  mutate(screening_PHQ_tot = PHQ1 + PHQ2 + PHQ3 + PHQ4 + PHQ5 + PHQ6 + PHQ7 + PHQ8 + PHQ9)

recruitment_data <- recruitment_data %>%
  select(PROLIFIC_PID, screening_PHQ_tot) %>%
  rename(subject = PROLIFIC_PID)

Qs_clean <- merge(recruitment_data, Qs_clean, by = "subject", all.y = TRUE)

#Making group a factor
Qs_clean <- Qs_clean %>%
  mutate(Group_factor = ifelse(Group == "Healthy", 1, 2)) %>%
  select(-Group) %>%
  rename(Group = Group_factor)

Qs_clean$Group <- factor(Qs_clean$Group,
                         levels = c(1, 2),
                         labels = c("Healthy", "Depression"))


# Creating Separate Dataframes for p.s that passed all attention --------
Qs_clean_attention_passed <- Qs_clean %>%
  filter(total_attention_passed == 8)

#Renaming dataframes to reflect eligible p.s
Qs_clean_reduced_attention <-  Qs_clean
Qs_clean <- Qs_clean_attention_passed


# Merging with task data --------------------------------------------------

## Eligible p.s only (passed all attention checks)
task_event_level_qs <- merge(event_level_data, Qs_clean, by = "subject")

task_event_level_qs <- task_event_level_qs %>%
  select(subject, Group, PHQ_tot, BDI_tot, everything()) %>%
  arrange(Group, subject, valence, happenCondition, desirability, lifeEvent)

#checking correct no. p.s (should be 110)
length(unique(task_event_level_qs$subject))

#Saving Data
write.csv(task_event_level_qs, "task_event_level_qs.csv", row.names = FALSE)
save(task_event_level_qs, file = "task_event_level_qs.RData")

## All p.s (7 or more attention checks passed)
task_event_level_qs_reduced_attention <- merge(event_level_data, Qs_clean_reduced_attention, by = "subject")

#checking correct no. p.s (should be 129)
length(unique(task_event_level_qs_reduced_attention$subject))

task_event_level_qs_reduced_attention <- task_event_level_qs_reduced_attention %>%
  select(subject, Group, PHQ_tot, BDI_tot, everything()) %>%
  arrange(Group, subject, valence, happenCondition, desirability, lifeEvent)

#Saving Data
write.csv(task_event_level_qs_reduced_attention, "task_event_level_qs_reduced_attention.csv", row.names = FALSE)
save(task_event_level_qs_reduced_attention, file = "task_event_level_qs_reduced_attention.RData")

